install.packages('corpcor')
install.packages('mctest')
require(mgcv)
require(corpcor)
require(mctest)


#Importing datasets

all_sellerdata  = read.csv(file="C:\\Users\\f00456n\\Documents\\Amazon Price Dynamics\\Data Scraped\\New Data\\Modeling data sets\\Electric Cooker\\finalnumsellers_dataJuly2025_ecooker.csv", header = TRUE, sep =",")

modeldata = all_sellerdata[4:454,]

modeldata[is.na(modeldata)] = 0
attach(modeldata)

trainY = cbind(Y_amzn_breville,Y_amzn_cosori,Y_amzn_crockp, Y_amzn_ep,Y_amzn_ip,Y_clust2_ep,Y_clust3_crockp,Y_clust4_breville,Y_clust5_crockp)

#AMAZON BREVILLE
data_amzn_brev = data.frame(cbind(Y_amzn_breville_lag1,Y_amzn_breville_lag2,Y_amzn_breville_lag3,   
                                  min_cumsalefreq_clust4_lag1,
                                  Y_amzn_cosori_lag1,
                                  mean_uniq_ASIN_clust5_lag1,
                                  Freeship_used3p_lag1,
                                  Y_amzn_cosori_lag2,
                                  buyboxprice_brv_lag1,
                                  Y_clust5_crockp_lag1,
                                  salesrank_cat_elec_lag1,
                                  max_salesrank_categ_IP_lag1,
                                  num_brvclust2_lag1,
                                  Y_clust4_breville_lag1,
                                  min_cumsalefreq_clust3_lag1,
                                  Y_clust3_crockp_lag3,
                                  Y_clust4_breville_lag2,
                                  buyboxprice_EP_lag1,
                                  Y_clust5_crockp_lag2,
                                  #experiment
                                  Y_amzn_ep_lag1,
                                  Y_amzn_ip_lag1,
                                  Y_clust2_ep_lag1,
                                  Y_clust3_crockp_lag1,
                                  #experiment edits end
                                  #FE with JMRnR
                                  priceAmzn_Brev_lag,
                                  lagdiff_Amznbrev,
                                  brv_bottombrandlag1,
                                  brv_topbrandlag1,
                                  crockp_bottombrandlag1,
                                  crockp_topbrandlag1,
                                  csri_bottombrandlag1,
                                  csri_topbrandlag1,
                                  ep_bottombrandlag1,
                                  ep_topbrandlag1,
                                  ip_bottombrandlag1,
                                  ip_topbrandlag1,
                                  decr10per_3p_ep	,
                                  decr10per_amzn_brev	,
                                  decr10per_amzn_csri	,
                                  decr10per_amzn_ep	,
                                  decr10per_amzn_ip	,
                                  decr10per_clust1_brev	,
                                  decr10per_clust3_crockp	,
                                  decr10per_clust4_brev	,
                                  decr10per_clust5_crockp	,
                                  decr20per_3p_ep	,
                                  decr20per_amzn_brev	,
                                  decr20per_amzn_csri	,
                                  decr20per_amzn_ep	,
                                  decr20per_amzn_ip	,
                                  decr20per_clust1_brev	,
                                  decr20per_clust3_crockp	,
                                  decr20per_clust4_brev	,
                                  decr20per_clust5_crockp	,
                                  decr5per_3p_ep	,
                                  decr5per_amzn_brev	,
                                  decr5per_amzn_csri	,
                                  decr5per_amzn_ep	,
                                  decr5per_amzn_ip	,
                                  decr5per_clust1_brev	,
                                  decr5per_clust3_crockp	,
                                  decr5per_clust4_brev	,
                                  decr5per_clust5_crockp	,
                                  incr10per_3p_ep	,
                                  incr10per_amzn_brev	,
                                  incr10per_amzn_csri	,
                                  incr10per_amzn_ep	,
                                  incr10per_amzn_ip	,
                                  incr10per_clust1_brev	,
                                  incr10per_clust3_crockp	,
                                  incr10per_clust4_brev	,
                                  incr10per_clust5_crockp	,
                                  incr20per_3p_ep	,
                                  incr20per_amzn_brev	,
                                  incr20per_amzn_csri	,
                                  incr20per_amzn_ep	,
                                  incr20per_amzn_ip	,
                                  incr20per_clust1_brev	,
                                  incr20per_clust3_crockp	,
                                  incr20per_clust4_brev	,
                                  incr20per_clust5_crockp	,
                                  incr5per_3p_ep	,
                                  incr5per_amzn_brev	,
                                  incr5per_amzn_csri	,
                                  incr5per_amzn_ep	,
                                  incr5per_amzn_ip	,
                                  incr5per_clust1_brev	,
                                  incr5per_clust3_crockp	,
                                  incr5per_clust4_brev	,
                                  incr5per_clust5_crockp	,
                                  weekend,
                                  #more additional variables - in absence of review text
                                  max_Product_reviews_IP_lag1,
                                  Product_reviews_EP_lag1,
                                  Product_reviews_brv_lag1,
                                  Product_reviews_cosori_lag1,
                                  Product_reviews_crockp_lag1,
                                  #additional variables after RnR 2
                                  mean_starrateBR_3p_lag,
                                  mean_product_star_rate_IP_lag1,
                                  mean_seller_star_rating_clust4,
                                  product_star_avbay_lag1,
                                  product_star_brv_lag1,      
                                  product_star_cmax_lag1,
                                  product_star_cnart_lag1,
                                  product_star_cosori_lag1,    
                                  product_star_crockp_lag1,
                                  product_star_elec_lag1,
                                  product_star_EP_lag1,       
                                  product_star_gkchf_lag1,
                                  product_star_hbeach_lag1, 
                                  product_star_mmill_lag1,    
                                  product_star_oyama_lag1,
                                  product_star_ppro_lag1,
                                  seasonal_sale,
                                nonbbox3pprice_brev_lag,
                                  ###broken down external site price changes with lags
                                  maxpricechange_HD,
                                  maxpricechange_HDlag1,
                                  maxpricechange_HDlag2,
                                  maxpricechange_HDlag3,
                                  maxpricechange_sears,
                                  maxpricechange_searslag1,
                                  maxpricechange_searslag2,
                                  maxpricechange_searslag3,
                                  maxpricechange_wm,
                                  maxpricechange_wmlag1,
                                  maxpricechange_wmlag2,
                                  maxpricechange_wmlag3))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_amzn_brev))

for (m in (1:ncol(data_amzn_brev)))
{ variance_X[m] = var(data_amzn_brev[,m]) }

nonzero_var = ncol(data_amzn_brev)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_amzn_brev),nonzero_var)

trainX_retained = data_amzn_brev[,(variance_X != 0)]

#VARIANCE CHECK
variance_Xret = rep(0,ncol(trainX_retained))
for (m in (1:ncol(trainX_retained)))
{ variance_Xret[m] = var(trainX_retained[,m]) }

#TREATING INDICATOR VARIABLES 
summary(trainX_retained)

data_amzn_brev = trainX_retained

trainY_amzn_brev = c(trainY[1:90,1],trainY[181:271,1],trainY[362:421,1])
train_amzn_gam_brev = rbind(data_amzn_brev[1:90,],data_amzn_brev[181:271,],data_amzn_brev[362:421,])
test1_amzn_gam_brev = data_amzn_brev[91:180,]
test2_amzn_gam_brev = data_amzn_brev[272:361,]
test3_amzn_gam_brev = data_amzn_brev[422:451,]
test_amzn_gam_brev = rbind(test1_amzn_gam_brev,test2_amzn_gam_brev,test3_amzn_gam_brev)
testY_amzn_gam_brev = c(Y_amzn_breville[91:180],Y_amzn_breville[272:361],Y_amzn_breville[422:451])

gam3_amzn_brev = gam(trainY_amzn_brev ~ decr5per_amzn_brev + Product_reviews_crockp_lag1 + 
                       weekend + buyboxprice_brv_lag1 + incr5per_amzn_ip + incr5per_amzn_brev + 
                       max_Product_reviews_IP_lag1 + Y_amzn_breville_lag2 + incr20per_clust4_brev + 
                       incr5per_amzn_ep + incr5per_clust4_brev + Product_reviews_cosori_lag1 + 
                       Y_amzn_breville_lag3 + product_star_brv_lag1 + product_star_cosori_lag1 + 
                       mean_seller_star_rating_clust4 + seasonal_sale + maxpricechange_wmlag2
                       ,family= gaussian(link ='identity') ,data=train_amzn_gam_brev,method="REML",optimizer=c("outer","newton"), fit = TRUE)


pred_amzn_gam3 <- predict.gam(gam3_amzn_brev,test_amzn_gam_brev)


MSE_amzn_gam3 = mean((testY_amzn_gam_brev - pred_amzn_gam3)^2)

SDSE_amzn_gam3 = sd((testY_amzn_gam_brev - pred_amzn_gam3)^2)

summary(gam3_amzn_brev)
forecast_amznbrev = cbind(testY_amzn_gam_brev,pred_amzn_gam3)
write.csv(forecast_amznbrev,file = 'forecast_amznbrev.csv')

#AMAZON COSORI

data_amzn_COSORI = data.frame(cbind(Y_amzn_cosori_lag1,Y_amzn_cosori_lag2,Y_amzn_cosori_lag3,min_cumsalefreq_clust4_lag1,
                                    mean_uniq_ASIN_clust5_lag1,
                                    Freeship_used3p_lag1,
                                    buyboxprice_brv_lag1,
                                    Y_amzn_breville_lag2,
                                    Y_clust5_crockp_lag1,
                                    salesrank_cat_elec_lag1,
                                    max_salesrank_categ_IP_lag1,
                                    num_brvclust2_lag1,
                                    Y_clust4_breville_lag1,
                                    min_cumsalefreq_clust3_lag1,
                                    Y_clust3_crockp_lag3,
                                    Y_clust4_breville_lag2,
                                    buyboxprice_EP_lag1,
                                    buyboxprice_cosori_lag1,
                                    Y_clust5_crockp_lag2,
                                    #FE with JMRnR
                                    brv_bottombrandlag1,
                                    brv_topbrandlag1,
                                    crockp_bottombrandlag1,
                                    crockp_topbrandlag1,
                                    csri_bottombrandlag1,
                                    csri_topbrandlag1,
                                    ep_bottombrandlag1,
                                    ep_topbrandlag1,
                                    ip_bottombrandlag1,
                                    ip_topbrandlag1,
                                    decr10per_3p_ep	,
                                    decr10per_amzn_brev	,
                                    decr10per_amzn_csri	,
                                    decr10per_amzn_ep	,
                                    decr10per_amzn_ip	,
                                    decr10per_clust1_brev	,
                                    decr10per_clust3_crockp	,
                                    decr10per_clust4_brev	,
                                    decr10per_clust5_crockp	,
                                    decr20per_3p_ep	,
                                    decr20per_amzn_brev	,
                                    decr20per_amzn_csri	,
                                    decr20per_amzn_ep	,
                                    decr20per_amzn_ip	,
                                    decr20per_clust1_brev	,
                                    decr20per_clust3_crockp	,
                                    decr20per_clust4_brev	,
                                    decr20per_clust5_crockp	,
                                    decr5per_3p_ep	,
                                    decr5per_amzn_brev	,
                                    decr5per_amzn_csri	,
                                    decr5per_amzn_ep	,
                                    decr5per_amzn_ip	,
                                    decr5per_clust1_brev	,
                                    decr5per_clust3_crockp	,
                                    decr5per_clust4_brev	,
                                    decr5per_clust5_crockp	,
                                    incr10per_3p_ep	,
                                    incr10per_amzn_brev	,
                                    incr10per_amzn_csri	,
                                    incr10per_amzn_ep	,
                                    incr10per_amzn_ip	,
                                    incr10per_clust1_brev	,
                                    incr10per_clust3_crockp	,
                                    incr10per_clust4_brev	,
                                    incr10per_clust5_crockp	,
                                    incr20per_3p_ep	,
                                    incr20per_amzn_brev	,
                                    incr20per_amzn_csri	,
                                    incr20per_amzn_ep	,
                                    incr20per_amzn_ip	,
                                    incr20per_clust1_brev	,
                                    incr20per_clust3_crockp	,
                                    incr20per_clust4_brev	,
                                    incr20per_clust5_crockp	,
                                    incr5per_3p_ep	,
                                    incr5per_amzn_brev	,
                                    incr5per_amzn_csri	,
                                    incr5per_amzn_ep	,
                                    incr5per_amzn_ip	,
                                    incr5per_clust1_brev	,
                                    incr5per_clust3_crockp	,
                                    incr5per_clust4_brev	,
                                    incr5per_clust5_crockp	,
                                    weekend,
                                    #more additional variables - in absence of review text
                                    max_Product_reviews_IP_lag1,
                                    Product_reviews_EP_lag1,
                                    Product_reviews_brv_lag1,
                                    Product_reviews_cosori_lag1,
                                    Product_reviews_crockp_lag1,
                                    
                                    #additional variables after RnR 2
                                    mean_starrateCR_3p_lag,
                                    mean_sellerstar_clust1_lag1,
                                    mean_sellerstar_clust2_lag1,
                                    mean_sellerstar_clust3_lag1,
                                    mean_sellerstar_clust4_lag1,
                                    mean_sellerstar_clust5_lag1,
                                    mean_product_star_rate_IP_lag1,
                                    product_star_avbay_lag1,
                                    product_star_brv_lag1,      
                                    product_star_cmax_lag1,
                                    product_star_cnart_lag1,
                                    product_star_cosori_lag1,    
                                    product_star_crockp_lag1,
                                    product_star_elec_lag1,
                                    product_star_EP_lag1,       
                                    product_star_gkchf_lag1,
                                    product_star_hbeach_lag1, 
                                    product_star_mmill_lag1,    
                                    product_star_oyama_lag1,
                                    product_star_ppro_lag1,
                                    seasonal_sale,
                                    ###broken down external site price changes with lags
                                    maxpricechange_HD,
                                    maxpricechange_HDlag1,
                                    maxpricechange_HDlag2,
                                    maxpricechange_HDlag3,
                                    maxpricechange_sears,
                                    maxpricechange_searslag1,
                                    maxpricechange_searslag2,
                                    maxpricechange_searslag3,
                                    maxpricechange_wm,
                                    maxpricechange_wmlag1,
                                    maxpricechange_wmlag2,
                                    maxpricechange_wmlag3))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_amzn_COSORI))

for (m in (1:ncol(data_amzn_COSORI)))
{ variance_X[m] = var(data_amzn_COSORI[,m]) }

nonzero_var = ncol(data_amzn_COSORI)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_amzn_COSORI),nonzero_var)

trainX_retained = data_amzn_COSORI[,(variance_X != 0)]

#VARIANCE CHECK
variance_Xret = rep(0,ncol(trainX_retained))
for (m in (1:ncol(trainX_retained)))
{ variance_Xret[m] = var(trainX_retained[,m]) }

#TREATING INDICATOR VARIABLES 
summary(trainX_retained)

data_amzn_COSORI = trainX_retained

trainY_amzn_COSORI= c(trainY[1:90,2],trainY[181:271,2],trainY[362:421,2])
train_amzn_gam_COSORI= rbind(data_amzn_COSORI[1:90,],data_amzn_COSORI[181:271,],data_amzn_COSORI[362:421,])
test1_amzn_gam_COSORI= data_amzn_COSORI[91:180,]
test2_amzn_gam_COSORI= data_amzn_COSORI[272:361,]
test3_amzn_gam_COSORI= data_amzn_COSORI[422:451,]
test_amzn_gam_COSORI= rbind(test1_amzn_gam_COSORI,test2_amzn_gam_COSORI,test3_amzn_gam_COSORI)
testY_amzn_gam_COSORI= c(Y_amzn_cosori[91:180],Y_amzn_cosori[272:361],Y_amzn_cosori[422:451])

#adding FE to the best GAM model
gam1_amzn_COSORI = gam(trainY_amzn_COSORI ~  
                        # Y_amzn_cosori_lag1	+
                         #Y_amzn_breville_lag2+
                         #min_cumsalefreq_clust4_lag1+
                         #Freeship_used3p_lag1	+
                         num_brvclust2_lag1	+
                         #min_cumsalefreq_clust3_lag1	+
                         #buyboxprice_EP_lag1+
                         #Y_clust3_crockp_lag3+
                         #brv_bottombrandlag1+
                        # csri_bottombrandlag1+
                         #decr10per_3p_ep	+	
                         #decr10per_amzn_brev	+
                         #decr10per_amzn_csri	+	
                         #decr10per_amzn_ep	+	
                         decr10per_amzn_ip	+
                         #decr10per_clust3_crockp	+	
                         #decr10per_clust4_brev	+	
                         #decr10per_clust5_crockp	+
                         #decr20per_amzn_brev	+	
                        # decr20per_amzn_csri	+	
                         #decr20per_amzn_ep	+
                         #decr20per_amzn_ip	+	
                         #decr20per_clust3_crockp	+	
                         #decr20per_clust4_brev	+
                         #decr20per_clust5_crockp	+	
                         #decr5per_3p_ep	+	
                        decr5per_amzn_brev	+
                        # decr5per_amzn_csri	+
                         #decr5per_amzn_ep	+
                       #  decr5per_amzn_ip	+
                         #decr5per_clust3_crockp	+	
                         #decr5per_clust4_brev	+	decr5per_clust5_crockp	+
                        # ep_bottombrandlag1	+	Freeship_used3p_lag1	+	
                         #incr10per_3p_ep	+
                         #incr10per_amzn_brev	+	
                         #incr10per_amzn_csri	+	
                       #  incr10per_amzn_ep	+
                         #incr10per_amzn_ip	+	incr10per_clust3_crockp	+	incr10per_clust4_brev	+
                         #incr10per_clust5_crockp	+	
                         #incr20per_3p_ep	+	incr20per_amzn_brev	+
                         #incr20per_amzn_csri	+	
                        # incr20per_amzn_ep	+	
                         #incr20per_amzn_ip	+
                         #incr20per_clust3_crockp	+	incr20per_clust4_brev	+	incr20per_clust5_crockp	+
                         #incr5per_3p_ep	+	
                         #incr5per_amzn_brev	+	
                         #incr5per_amzn_csri	+
                        # incr10per_clust4_brev +
                         #incr5per_amzn_ep	+	incr5per_amzn_ip	+	incr5per_clust3_crockp	+
                         ip_topbrandlag1	+
                         #incr5per_clust4_brev	+	incr5per_clust5_crockp	+	
                         weekend+
                         #abs5per_amzn_csri*weekend +
                         #incr5per_amzn_csri*weekend +
                       #max_Product_reviews_IP_lag1+
                         #Product_reviews_EP_lag1+
                        # Product_reviews_brv_lag1+
                         #Product_reviews_cosori_lag1+
                         Product_reviews_crockp_lag1 +
                         #adding the 2nd RnR comments here
                           #product_star_avbay_lag1+
                         mean_product_star_rate_IP_lag1 +
                        # product_star_brv_lag1+      
                         #product_star_cmax_lag1+
                        # product_star_cnart_lag1+
                         product_star_cosori_lag1+    
                        # product_star_crockp_lag1+
                         #product_star_elec_lag1+
                         product_star_EP_lag1+  
                         buyboxprice_cosori_lag1+
                         #product_star_gkchf_lag1+
                         #product_star_hbeach_lag1+ 
                         #product_star_mmill_lag1+    
                         #product_star_oyama_lag1+
                         #product_star_ppro_lag1+
                       seasonal_sale +
                         
                         ###4th round additions
                       #  maxpricechange_HD +
                        # maxpricechange_HDlag1 +
                        # maxpricechange_HDlag2 +
                         maxpricechange_HDlag2 
                        # maxpricechange_sears +
                        # maxpricechange_searslag1 +
                        # maxpricechange_searslag2 +
                         #maxpricechange_searslag3 +
                        # maxpricechange_wm  +
                        # maxpricechange_wmlag1+
                         #maxpricechange_wmlag2
                       #  maxpricechange_wmlag3
                       
                       ,family= gaussian(link ='identity') ,data=train_amzn_gam_COSORI ,method="REML",optimizer=c("outer","newton"), fit = TRUE)






pred_amzn_gam1 <- predict.gam(gam1_amzn_COSORI,test_amzn_gam_COSORI)

MSE_amzn_gam1 = mean((testY_amzn_gam_COSORI - pred_amzn_gam1)^2)
SDSE_amzn_gam1 = sd((testY_amzn_gam_COSORI - pred_amzn_gam1)^2)
forecast_amzncosori = cbind(testY_amzn_gam_COSORI,pred_amzn_gam1)
summary(gam1_amzn_COSORI)
write.csv(forecast_amzncosori,file = 'forecast_amzncosori.csv')


#AMAZON crockp

data_amzn_crockp = data.frame(cbind(Y_amzn_crockp_lag1,Y_amzn_crockp_lag2,Y_amzn_crockp_lag3,min_cumsalefreq_clust4_lag1,
                                    mean_uniq_ASIN_clust5_lag1,
                                    Freeship_used3p_lag1,
                                    buyboxprice_brv_lag1,
                                    Y_amzn_breville_lag2,
                                    Y_clust5_crockp_lag1,
                                    salesrank_cat_elec_lag1,
                                    max_salesrank_categ_IP_lag1,
                                    num_brvclust2_lag1,
                                    Y_clust4_breville_lag1,
                                    min_cumsalefreq_clust3_lag1,
                                    Y_clust3_crockp_lag3,
                                    Y_clust4_breville_lag2,
                                    buyboxprice_EP_lag1,
                                    buyboxprice_crockp_lag1,
                                    Y_clust5_crockp_lag2,
                                    #FE with JMRnR
                                    brv_bottombrandlag1,
                                    brv_topbrandlag1,
                                    crockp_bottombrandlag1,
                                    crockp_topbrandlag1,
                                    csri_bottombrandlag1,
                                    csri_topbrandlag1,
                                    ep_bottombrandlag1,
                                    ep_topbrandlag1,
                                    ip_bottombrandlag1,
                                    ip_topbrandlag1,
                                    decr10per_3p_ep	,
                                    decr10per_amzn_brev	,
                                    decr10per_amzn_csri	,
                                    decr10per_amzn_ep	,
                                    decr10per_amzn_ip	,
                                    decr10per_clust1_brev	,
                                    decr10per_clust3_crockp	,
                                    decr10per_clust4_brev	,
                                    decr10per_clust5_crockp	,
                                    decr20per_3p_ep	,
                                    decr20per_amzn_brev	,
                                    decr20per_amzn_csri	,
                                    decr20per_amzn_ep	,
                                    decr20per_amzn_ip	,
                                    decr20per_clust1_brev	,
                                    decr20per_clust3_crockp	,
                                    decr20per_clust4_brev	,
                                    decr20per_clust5_crockp	,
                                    decr5per_3p_ep	,
                                    decr5per_amzn_brev	,
                                    decr5per_amzn_csri	,
                                    decr5per_amzn_ep	,
                                    decr5per_amzn_ip	,
                                    decr5per_clust1_brev	,
                                    decr5per_clust3_crockp	,
                                    decr5per_clust4_brev	,
                                    decr5per_clust5_crockp	,
                                    incr10per_3p_ep	,
                                    incr10per_amzn_brev	,
                                    incr10per_amzn_csri	,
                                    incr10per_amzn_ep	,
                                    incr10per_amzn_ip	,
                                    incr10per_clust1_brev	,
                                    incr10per_clust3_crockp	,
                                    incr10per_clust4_brev	,
                                    incr10per_clust5_crockp	,
                                    incr20per_3p_ep	,
                                    incr20per_amzn_brev	,
                                    incr20per_amzn_csri	,
                                    incr20per_amzn_ep	,
                                    incr20per_amzn_ip	,
                                    incr20per_clust1_brev	,
                                    incr20per_clust3_crockp	,
                                    incr20per_clust4_brev	,
                                    incr20per_clust5_crockp	,
                                    incr5per_3p_ep	,
                                    incr5per_amzn_brev	,
                                    incr5per_amzn_csri	,
                                    incr5per_amzn_ep	,
                                    incr5per_amzn_ip	,
                                    incr5per_clust1_brev	,
                                    incr5per_clust3_crockp	,
                                    incr5per_clust4_brev	,
                                    incr5per_clust5_crockp	,
                                    weekend,
                                    #more additional variables - in absence of review text
                                    max_Product_reviews_IP_lag1,
                                    Product_reviews_EP_lag1,
                                    Product_reviews_brv_lag1,
                                    Product_reviews_crockp_lag1,
                                    Product_reviews_crockp_lag1,
                                    
                                    #additional variables after RnR 2
                                    mean_starrateCR_3p_lag,
                                    mean_sellerstar_clust1_lag1,
                                    mean_sellerstar_clust2_lag1,
                                    mean_sellerstar_clust3_lag1,
                                    mean_sellerstar_clust4_lag1,
                                    mean_sellerstar_clust5_lag1,
                                    mean_product_star_rate_IP_lag1,
                                    product_star_avbay_lag1,
                                    product_star_brv_lag1,      
                                    product_star_cmax_lag1,
                                    product_star_cnart_lag1,
                                    product_star_crockp_lag1,    
                                    product_star_crockp_lag1,
                                    product_star_elec_lag1,
                                    product_star_EP_lag1,       
                                    product_star_gkchf_lag1,
                                    product_star_hbeach_lag1, 
                                    product_star_mmill_lag1,    
                                    product_star_oyama_lag1,
                                    product_star_ppro_lag1,
                                    seasonal_sale,
                                    ###broken down external site price changes with lags
                                    maxpricechange_HD,
                                    maxpricechange_HDlag1,
                                    maxpricechange_HDlag2,
                                    maxpricechange_HDlag3,
                                    maxpricechange_sears,
                                    maxpricechange_searslag1,
                                    maxpricechange_searslag2,
                                    maxpricechange_searslag3,
                                    maxpricechange_wm,
                                    maxpricechange_wmlag1,
                                    maxpricechange_wmlag2,
                                    maxpricechange_wmlag3))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_amzn_crockp))

for (m in (1:ncol(data_amzn_crockp)))
{ variance_X[m] = var(data_amzn_crockp[,m]) }

nonzero_var = ncol(data_amzn_crockp)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_amzn_crockp),nonzero_var)

trainX_retained = data_amzn_crockp[,(variance_X != 0)]

#VARIANCE CHECK
variance_Xret = rep(0,ncol(trainX_retained))
for (m in (1:ncol(trainX_retained)))
{ variance_Xret[m] = var(trainX_retained[,m]) }

#TREATING INDICATOR VARIABLES 
summary(trainX_retained)

data_amzn_crockp = trainX_retained

trainY_amzn_crockp= c(trainY[1:90,3],trainY[181:271,3],trainY[362:421,3])
train_amzn_gam_crockp= rbind(data_amzn_crockp[1:90,],data_amzn_crockp[181:271,],data_amzn_crockp[362:421,])
test1_amzn_gam_crockp= data_amzn_crockp[91:180,]
test2_amzn_gam_crockp= data_amzn_crockp[272:361,]
test3_amzn_gam_crockp= data_amzn_crockp[422:451,]
test_amzn_gam_crockp= rbind(test1_amzn_gam_crockp,test2_amzn_gam_crockp,test3_amzn_gam_crockp)
testY_amzn_gam_crockp= c(Y_amzn_crockp[91:180],Y_amzn_crockp[272:361],Y_amzn_crockp[422:451])

#adding FE to the best GAM model
gam1_amzn_crockp = gam(trainY_amzn_crockp ~  
                        # incr20per_clust4_brev	+
                        # incr5per_amzn_brev	+
                        # incr5per_amzn_ip	+
                         #incr5per_clust5_crockp	+
                       
                        # mean_sellerstar_clust5_lag1	+
                        # min_cumsalefreq_clust3_lag1	+
                        weekend	+
                          crockp_topbrandlag1	+
                       #  decr10per_amzn_brev	+
                         decr10per_amzn_ip	+
                        # decr10per_clust5_crockp	+
                         # incr20per_clust3_crockp	+
                         incr5per_clust3_crockp	+
                        # ip_topbrandlag1	+
                          mean_product_star_rate_IP_lag1	+
                         mean_sellerstar_clust3_lag1	+
                        # mean_starrateCR_3p_lag	+
                         min_cumsalefreq_clust4_lag1	+
                         Product_reviews_crockp_lag1	+
                        # product_star_crockp_lag1	+
                         buyboxprice_crockp_lag1	+
                         decr20per_amzn_csri	+
                         decr20per_clust3_crockp	+
                          #decr5per_clust5_crockp	+
                         #incr10per_clust5_crockp	+
                         #incr20per_clust3_crockp	+
                      #   max_Product_reviews_IP_lag1	+
                         mean_sellerstar_clust4_lag1	+
                         seasonal_sale	+
                         maxpricechange_searslag1	
                        # Y_amzn_crockp_lag1	
                         ,family= gaussian(link ='identity') ,data=train_amzn_gam_crockp ,method="REML",optimizer=c("outer","newton"), fit = TRUE)


pred_amzn_gam1 <- predict.gam(gam1_amzn_crockp,test_amzn_gam_crockp)

MSE_amzn_gam1 = mean((testY_amzn_gam_crockp - pred_amzn_gam1)^2)
SDSE_amzn_gam1 = sd((testY_amzn_gam_crockp - pred_amzn_gam1)^2)
forecast_amzncrockp = cbind(testY_amzn_gam_crockp,pred_amzn_gam1)
summary(gam1_amzn_crockp)
write.csv(forecast_amzncrockp,file = 'forecast_amzncrockp.csv')


#AMAZON ELITE PLATINUM


data_amzn_EP = data.frame(cbind(Y_amzn_ep_lag1,Y_amzn_ep_lag2,Y_amzn_ep_lag3,   
                                min_cumsalefreq_clust4_lag1,
                                Y_amzn_cosori_lag1,
                                mean_uniq_ASIN_clust5_lag1,
                                Freeship_used3p_lag1,
                                Y_amzn_cosori_lag2,
                                buyboxprice_brv_lag1,
                                Y_amzn_breville_lag2,
                                Y_clust5_crockp_lag1,
                                salesrank_cat_elec_lag1,
                                max_salesrank_categ_IP_lag1,
                                num_brvclust2_lag1,
                                Y_clust4_breville_lag1,
                                min_cumsalefreq_clust3_lag1,
                                Y_clust3_crockp_lag3,
                                Y_clust4_breville_lag2,
                                buyboxprice_EP_lag1,
                                Y_clust5_crockp_lag2,
                                #FE with JMRnR
                                brv_bottombrandlag1,
                                brv_topbrandlag1,
                                crockp_bottombrandlag1,
                                crockp_topbrandlag1,
                                csri_bottombrandlag1,
                                csri_topbrandlag1,
                                ep_bottombrandlag1,
                                ep_topbrandlag1,
                                ip_bottombrandlag1,
                                ip_topbrandlag1,
                                decr10per_3p_ep	,
                                decr10per_amzn_brev	,
                                decr10per_amzn_csri	,
                                decr10per_amzn_ep	,
                                decr10per_amzn_ip	,
                                decr10per_clust1_brev	,
                                decr10per_clust3_crockp	,
                                decr10per_clust4_brev	,
                                decr10per_clust5_crockp	,
                                decr20per_3p_ep	,
                                decr20per_amzn_brev	,
                                decr20per_amzn_csri	,
                                decr20per_amzn_ep	,
                                decr20per_amzn_ip	,
                                decr20per_clust1_brev	,
                                decr20per_clust3_crockp	,
                                decr20per_clust4_brev	,
                                decr20per_clust5_crockp	,
                                decr5per_3p_ep	,
                                decr5per_amzn_brev	,
                                decr5per_amzn_csri	,
                                decr5per_amzn_ep	,
                                decr5per_amzn_ip	,
                                decr5per_clust1_brev	,
                                decr5per_clust3_crockp	,
                                decr5per_clust4_brev	,
                                decr5per_clust5_crockp	,
                                incr10per_3p_ep	,
                                incr10per_amzn_brev	,
                                incr10per_amzn_csri	,
                                incr10per_amzn_ep	,
                                incr10per_amzn_ip	,
                                incr10per_clust1_brev	,
                                incr10per_clust3_crockp	,
                                incr10per_clust4_brev	,
                                incr10per_clust5_crockp	,
                                incr20per_3p_ep	,
                                incr20per_amzn_brev	,
                                incr20per_amzn_csri	,
                                incr20per_amzn_ep	,
                                incr20per_amzn_ip	,
                                incr20per_clust1_brev	,
                                incr20per_clust3_crockp	,
                                incr20per_clust4_brev	,
                                incr20per_clust5_crockp	,
                                incr5per_3p_ep	,
                                incr5per_amzn_brev	,
                                incr5per_amzn_csri	,
                                incr5per_amzn_ep	,
                                incr5per_amzn_ip	,
                                incr5per_clust1_brev	,
                                incr5per_clust3_crockp	,
                                incr5per_clust4_brev	,
                                incr5per_clust5_crockp	,
                                weekend,
                                #more additional variables - in absence of review text
                                max_Product_reviews_IP_lag1,
                                Product_reviews_EP_lag1,
                                Product_reviews_brv_lag1,
                                Product_reviews_cosori_lag1,
                                Product_reviews_crockp_lag1,
                                #additional variables after RnR 2
                                mean_sellerstar_clust1_lag1,
                                mean_sellerstar_clust2_lag1,
                                mean_sellerstar_clust3_lag1,
                                mean_sellerstar_clust4_lag1,
                                mean_sellerstar_clust5_lag1,
                                mean_product_star_rate_IP_lag1,
                                product_star_avbay_lag1,
                                product_star_brv_lag1,      
                                product_star_cmax_lag1,
                                product_star_cnart_lag1,
                                product_star_cosori_lag1,    
                                product_star_crockp_lag1,
                                product_star_elec_lag1,
                                product_star_EP_lag1,       
                                product_star_gkchf_lag1,
                                product_star_hbeach_lag1, 
                                product_star_mmill_lag1,    
                                product_star_oyama_lag1,
                                product_star_ppro_lag1,
                                seasonal_sale,
                                ###broken down external site price changes with lags
                                maxpricechange_HD,
                                maxpricechange_HDlag1,
                                maxpricechange_HDlag2,
                                maxpricechange_HDlag3,
                                maxpricechange_sears,
                                maxpricechange_searslag1,
                                maxpricechange_searslag2,
                                maxpricechange_searslag3,
                                maxpricechange_wm,
                                maxpricechange_wmlag1,
                                maxpricechange_wmlag2,
                                maxpricechange_wmlag3))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_amzn_EP))

for (m in (1:ncol(data_amzn_EP)))
{ variance_X[m] = var(data_amzn_EP[,m]) }

nonzero_var = ncol(data_amzn_EP)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_amzn_EP),nonzero_var)

trainX_retained = data_amzn_EP[,(variance_X != 0)]

#VARIANCE CHECK
variance_Xret = rep(0,ncol(trainX_retained))
for (m in (1:ncol(trainX_retained)))
{ variance_Xret[m] = var(trainX_retained[,m]) }

#TREATING INDICATOR VARIABLES 
summary(trainX_retained)

data_amzn_EP = trainX_retained

trainY_amzn_EP = c(trainY[1:90,4],trainY[181:271,4],trainY[362:421,4])
train_amzn_gam_EP = rbind(data_amzn_EP[1:90,],data_amzn_EP[181:271,],data_amzn_EP[362:421,])
test1_amzn_gam_EP = data_amzn_EP[91:180,]
test2_amzn_gam_EP = data_amzn_EP[272:361,]
test3_amzn_gam_EP = data_amzn_EP[422:451,]
test_amzn_gam_EP = rbind(test1_amzn_gam_EP,test2_amzn_gam_EP,test3_amzn_gam_EP)
testY_amzn_gam_EP = c(Y_amzn_ep[91:180],Y_amzn_ep[272:361],Y_amzn_ep[422:451])

#adding FE to the best GAM model
gam4_amzn_EP  = gam(trainY_amzn_EP  ~ 
                      #Y_amzn_ep_lag3	+
                      min_cumsalefreq_clust4_lag1 +
                    #  mean_uniq_ASIN_clust5_lag1 +	
                     # Freeship_used3p_lag1 +
                      buyboxprice_EP_lag1+
                       decr20per_amzn_ep	+
                       incr5per_clust3_crockp	+
                     incr10per_3p_ep+
                      #incr5per_clust4_brev	+	incr5per_clust5_crockp	+	
                     # ip_topbrandlag1+
                      weekend+
                      # product_star_brv_lag1+      
                      #product_star_cmax_lag1+
                    #  product_star_cnart_lag1+
                      product_star_cosori_lag1+    
                      product_star_crockp_lag1+
                      #product_star_elec_lag1+
                      product_star_EP_lag1+       
                      #product_star_gkchf_lag1+
                      #product_star_hbeach_lag1+ 
                      #product_star_mmill_lag1+    
                      #product_star_oyama_lag1+
                      #product_star_ppro_lag1+
                      seasonal_sale +
                      
                      ###4th round additions
                      #maxpricechange_HD +
                    #  maxpricechange_HDlag3 +
                      #maxpricechange_sears +
                     # maxpricechange_searslag3 +
                      #maxpricechange_wm  
                      maxpricechange_wmlag3
                    
                    ,family= gaussian(link ='identity') ,data=train_amzn_gam_EP ,method="REML",optimizer=c("outer","newton"), fit = TRUE)

pred_amzn_gam4 <- predict.gam(gam4_amzn_EP,test_amzn_gam_EP)
MSE_amzn_gam4 = mean((testY_amzn_gam_EP - pred_amzn_gam4)^2)
SDSE_amzn_gam4 = sd((testY_amzn_gam_EP - pred_amzn_gam4)^2)
summary(gam4_amzn_EP)

forecast_amznep = cbind(testY_amzn_gam_EP,pred_amzn_gam4)
write.csv(forecast_amznep,file = 'forecast_amznep.csv')

#AMAZON INSTANT POT


data_amzn_IP = data.frame(cbind(Y_amzn_ip_lag1,Y_amzn_ip_lag2,Y_amzn_ip_lag3,   
                                min_cumsalefreq_clust4_lag1,
                                max_buyboxprice_IP_lag1,
                                mean_buyboxprice_IP_lag1,
                                Y_amzn_cosori_lag1,
                                mean_uniq_ASIN_clust5_lag1,
                                Freeship_used3p_lag1,
                                Y_amzn_cosori_lag2,
                                buyboxprice_brv_lag1,
                                Y_amzn_breville_lag2,
                                Y_clust5_crockp_lag1,
                                salesrank_cat_elec_lag1,
                                max_salesrank_categ_IP_lag1,
                                num_brvclust2_lag1,
                                Y_clust4_breville_lag1,
                                min_cumsalefreq_clust3_lag1,
                                Y_clust3_crockp_lag3,
                                Y_clust4_breville_lag2,
                                buyboxprice_EP_lag1,
                                Y_clust5_crockp_lag2,
                                #FE with JMRnR
                                brv_bottombrandlag1,
                                brv_topbrandlag1,
                                crockp_bottombrandlag1,
                                crockp_topbrandlag1,
                                csri_bottombrandlag1,
                                csri_topbrandlag1,
                                ep_bottombrandlag1,
                                ep_topbrandlag1,
                                ip_bottombrandlag1,
                                ip_topbrandlag1,
                                decr10per_3p_ep	,
                                decr10per_amzn_brev	,
                                decr10per_amzn_csri	,
                                decr10per_amzn_ep	,
                                decr10per_amzn_ip	,
                                decr10per_clust1_brev	,
                                decr10per_clust3_crockp	,
                                decr10per_clust4_brev	,
                                decr10per_clust5_crockp	,
                                decr20per_3p_ep	,
                                decr20per_amzn_brev	,
                                decr20per_amzn_csri	,
                                decr20per_amzn_ep	,
                                decr20per_amzn_ip	,
                                decr20per_clust1_brev	,
                                decr20per_clust3_crockp	,
                                decr20per_clust4_brev	,
                                decr20per_clust5_crockp	,
                                decr5per_3p_ep	,
                                decr5per_amzn_brev	,
                                decr5per_amzn_csri	,
                                decr5per_amzn_ep	,
                                decr5per_amzn_ip	,
                                decr5per_clust1_brev	,
                                decr5per_clust3_crockp	,
                                decr5per_clust4_brev	,
                                decr5per_clust5_crockp	,
                                incr10per_3p_ep	,
                                incr10per_amzn_brev	,
                                incr10per_amzn_csri	,
                                incr10per_amzn_ep	,
                                incr10per_amzn_ip	,
                                incr10per_clust1_brev	,
                                incr10per_clust3_crockp	,
                                incr10per_clust4_brev	,
                                incr10per_clust5_crockp	,
                                incr20per_3p_ep	,
                                incr20per_amzn_brev	,
                                incr20per_amzn_csri	,
                                incr20per_amzn_ep	,
                                incr20per_amzn_ip	,
                                incr20per_clust1_brev	,
                                incr20per_clust3_crockp	,
                                incr20per_clust4_brev	,
                                incr20per_clust5_crockp	,
                                incr5per_3p_ep	,
                                incr5per_amzn_brev	,
                                incr5per_amzn_csri	,
                                incr5per_amzn_ep	,
                                incr5per_amzn_ip	,
                                incr5per_clust1_brev	,
                                incr5per_clust3_crockp	,
                                incr5per_clust4_brev	,
                                incr5per_clust5_crockp	,
                                weekend,
                                #more additional variables - in absence of review text
                                mean_sellerstar_clust1_lag1,
                                mean_sellerstar_clust2_lag1,
                                mean_sellerstar_clust3_lag1,
                                mean_sellerstar_clust4_lag1,
                                mean_sellerstar_clust5_lag1,
                                max_Product_reviews_IP_lag1,
                                Product_reviews_EP_lag1,
                                Product_reviews_brv_lag1,
                                Product_reviews_cosori_lag1,
                                Product_reviews_crockp_lag1,
                                #additional variables after RnR 2
                                mean_product_star_rate_IP_lag1,
                                product_star_avbay_lag1,
                                product_star_brv_lag1,      
                                product_star_cmax_lag1,
                                product_star_cnart_lag1,
                                product_star_cosori_lag1,    
                                product_star_crockp_lag1,
                                product_star_elec_lag1,
                                product_star_EP_lag1,       
                                product_star_gkchf_lag1,
                                product_star_hbeach_lag1, 
                                product_star_mmill_lag1,    
                                product_star_oyama_lag1,
                                product_star_ppro_lag1,
                                seasonal_sale,
                                ###broken down external site price changes with lags
                                maxpricechange_HD,
                                maxpricechange_HDlag1,
                                maxpricechange_HDlag2,
                                maxpricechange_HDlag3,
                                maxpricechange_sears,
                                maxpricechange_searslag1,
                                maxpricechange_searslag2,
                                maxpricechange_searslag3,
                                maxpricechange_wm,
                                maxpricechange_wmlag1,
                                maxpricechange_wmlag2,
                                maxpricechange_wmlag3))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_amzn_IP))

for (m in (1:ncol(data_amzn_IP)))
{ variance_X[m] = var(data_amzn_IP[,m]) }

nonzero_var = ncol(data_amzn_IP)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_amzn_IP),nonzero_var)

trainX_retained = data_amzn_IP[,(variance_X != 0)]

#VARIANCE CHECK
variance_Xret = rep(0,ncol(trainX_retained))
for (m in (1:ncol(trainX_retained)))
{ variance_Xret[m] = var(trainX_retained[,m]) }

#TREATING INDICATOR VARIABLES 
summary(trainX_retained)

data_amzn_IP = trainX_retained


trainY_amzn_IP = c(trainY[1:90,5],trainY[181:271,5],trainY[362:421,5])
train_amzn_gam_IP = rbind(data_amzn_IP[1:90,],data_amzn_IP[181:271,],data_amzn_IP[362:421,])
test1_amzn_gam_IP = data_amzn_IP[91:180,]
test2_amzn_gam_IP = data_amzn_IP[272:361,]
test3_amzn_gam_IP = data_amzn_IP[422:451,]
test_amzn_gam_IP = rbind(test1_amzn_gam_IP,test2_amzn_gam_IP,test3_amzn_gam_IP)
testY_amzn_gam_IP = c(Y_amzn_ip[91:180],Y_amzn_ip[272:361],Y_amzn_ip[422:451])

#adding FE to the best GAM model

gam5_amzn_IP  = gam(trainY_amzn_IP  ~ 
                      #Y_amzn_ip_lag1	+
                      min_cumsalefreq_clust4_lag1 +
                     # Y_amzn_cosori_lag2 +
                     # Y_clust5_crockp_lag1 +
                     # num_brvclust2_lag1+
                    #  Y_clust3_crockp_lag3 +
                      mean_buyboxprice_IP_lag1+
                      max_buyboxprice_IP_lag1+
                      #decr10per_3p_ep	+	
                      #decr10per_amzn_brev	+
                      #decr10per_amzn_csri	+	
                      #decr10per_amzn_ep	+	
                      decr10per_amzn_ip	+
                      #decr10per_clust3_crockp	+	
                      #decr10per_clust4_brev	+	
                      #decr10per_clust5_crockp	+
                      #decr20per_amzn_brev	+	
                      #decr20per_amzn_csri	+	decr20per_amzn_ep	+
                      decr20per_amzn_ip	+
                      #decr20per_clust3_crockp	+	
                      #decr20per_clust4_brev	+
                      #decr20per_clust5_crockp	+	
                      #decr5per_3p_ep	+	decr5per_amzn_brev	+
                      #decr5per_amzn_csri	+	decr5per_amzn_ep	+	decr5per_amzn_ip	+
                      #decr5per_clust3_crockp	+	
                      #decr5per_clust4_brev	+	
                      decr5per_clust5_crockp	+
                      #ep_bottombrandlag1	+	
                      Freeship_used3p_lag1	+	
                      #incr10per_3p_ep	+
                      incr5per_amzn_ip +
                      incr10per_amzn_brev	+	
                      #incr10per_amzn_csri	+	
                      #incr10per_amzn_ep	+
                      #incr10per_amzn_ip	+	
                      #incr10per_clust3_crockp	+	incr10per_clust4_brev	+
                      #incr10per_clust5_crockp	+	
                      #incr20per_3p_ep	+	incr20per_amzn_brev	+
                      #incr20per_amzn_csri	+	incr20per_amzn_ep	+	
                    #  incr20per_amzn_ip	+
                      #incr20per_clust3_crockp	+	
                      incr20per_clust4_brev	+	
                      #incr20per_clust5_crockp	+
                      #incr5per_3p_ep	+	
                      incr5per_amzn_brev	+	
                      #incr5per_amzn_csri	+
                      #incr5per_amzn_ep	+	incr5per_amzn_ip	+	incr5per_clust3_crockp	+
                      #incr5per_clust4_brev	+	incr5per_clust5_crockp	+	
                      ip_topbrandlag1+
                      weekend+
                      max_Product_reviews_IP_lag1+
                     # Product_reviews_EP_lag1+
                      #Product_reviews_brv_lag1+
                      #Product_reviews_cosori_lag1+
                      Product_reviews_crockp_lag1+
                      #additional FEs based on 2nd RnR
                      mean_sellerstar_clust5_lag1+
                      #product_star_avbay_lag1+
                      mean_product_star_rate_IP_lag1+
                      product_star_brv_lag1+      
                      #product_star_cmax_lag1+
                     # product_star_cnart_lag1+
                     # product_star_cosori_lag1+    
                      product_star_crockp_lag1+
                      #product_star_elec_lag1+
                      product_star_EP_lag1+       
                      #product_star_gkchf_lag1+
                      #product_star_hbeach_lag1+ 
                      #product_star_mmill_lag1+    
                      #product_star_oyama_lag1+
                      #product_star_ppro_lag1+
                      seasonal_sale+
                      
                      ###4th round additions
                      #maxpricechange_HD +
                      maxpricechange_HDlag1 +
                     # maxpricechange_HDlag2 +
                     # maxpricechange_HDlag3 +
                     # maxpricechange_sears +
                      maxpricechange_searslag1 +
                     # maxpricechange_searslag2 +
                     # maxpricechange_searslag3 +
                      #maxpricechange_wm  +
                      maxpricechange_wmlag1 
                     # maxpricechange_wmlag2 +
                     # maxpricechange_wmlag3
                      ,family= gaussian(link ='identity') ,data=train_amzn_gam_IP ,method="REML",optimizer=c("outer","newton"), fit = TRUE)


pred_amzn_gam5 <- predict.gam(gam5_amzn_IP,test_amzn_gam_IP)
MSE_amzn_gam5 = mean((testY_amzn_gam_IP - pred_amzn_gam5)^2)
SDSE_amzn_gam5 = sd((testY_amzn_gam_IP - pred_amzn_gam5)^2)
summary(gam5_amzn_IP)

forecast_amznip = cbind(testY_amzn_gam_IP,pred_amzn_gam5)
write.csv(forecast_amznip,file = 'forecast_amznip.csv')


#CLUSTER 2
#trainY = cbind(Y_amzn_breville,Y_amzn_cosori,Y_amzn_ep,Y_amzn_ip,Y_clust2_ep,Y_clust3_crockp,Y_clust4_breville,Y_clust5_crockp)

data_clust2 = data.frame(cbind(Y_clust2_ep_lag1,
                               Y_clust2_ep_lag2,
                               Y_clust2_ep_lag3,
                               min_cumsalefreq_clust4_lag1,
                               Y_amzn_cosori_lag1,
                               mean_uniq_ASIN_clust5_lag1,
                               Freeship_used3p_lag1,
                               Y_amzn_cosori_lag2,
                               buyboxprice_brv_lag1,
                               Y_amzn_breville_lag2,
                               Y_clust5_crockp_lag1,
                               salesrank_cat_elec_lag1,
                               max_salesrank_categ_IP_lag1,
                               num_brvclust2_lag1,
                               Y_clust4_breville_lag1,
                               min_cumsalefreq_clust3_lag1,
                               Y_clust3_crockp_lag3,
                               Y_clust4_breville_lag2,
                               buyboxprice_EP_lag1,
                               Y_clust5_crockp_lag2,
                               #FE with JMRnR
                               brv_bottombrandlag1,
                               brv_topbrandlag1,
                               crockp_bottombrandlag1,
                               crockp_topbrandlag1,
                               csri_bottombrandlag1,
                               csri_topbrandlag1,
                               ep_bottombrandlag1,
                               ep_topbrandlag1,
                               ip_bottombrandlag1,
                               ip_topbrandlag1,
                               decr10per_3p_ep	,
                               decr10per_amzn_brev	,
                               decr10per_amzn_csri	,
                               decr10per_amzn_ep	,
                               decr10per_amzn_ip	,
                               decr10per_clust1_brev	,
                               decr10per_clust3_crockp	,
                               decr10per_clust4_brev	,
                               decr10per_clust5_crockp	,
                               decr20per_3p_ep	,
                               decr20per_amzn_brev	,
                               decr20per_amzn_csri	,
                               decr20per_amzn_ep	,
                               decr20per_amzn_ip	,
                               decr20per_clust1_brev	,
                               decr20per_clust3_crockp	,
                               decr20per_clust4_brev	,
                               decr20per_clust5_crockp	,
                               decr5per_3p_ep	,
                               decr5per_amzn_brev	,
                               decr5per_amzn_csri	,
                               decr5per_amzn_ep	,
                               decr5per_amzn_ip	,
                               decr5per_clust1_brev	,
                               decr5per_clust3_crockp	,
                               decr5per_clust4_brev	,
                               decr5per_clust5_crockp	,
                               incr10per_3p_ep	,
                               incr10per_amzn_brev	,
                               incr10per_amzn_csri	,
                               incr10per_amzn_ep	,
                               incr10per_amzn_ip	,
                               incr10per_clust1_brev	,
                               incr10per_clust3_crockp	,
                               incr10per_clust4_brev	,
                               incr10per_clust5_crockp	,
                               incr20per_3p_ep	,
                               incr20per_amzn_brev	,
                               incr20per_amzn_csri	,
                               incr20per_amzn_ep	,
                               incr20per_amzn_ip	,
                               incr20per_clust1_brev	,
                               incr20per_clust3_crockp	,
                               incr20per_clust4_brev	,
                               incr20per_clust5_crockp	,
                               incr5per_3p_ep	,
                               incr5per_amzn_brev	,
                               incr5per_amzn_csri	,
                               incr5per_amzn_ep	,
                               incr5per_amzn_ip	,
                               incr5per_clust1_brev	,
                               incr5per_clust3_crockp	,
                               incr5per_clust4_brev	,
                               incr5per_clust5_crockp	,
                               weekend,
                               #more additional variables - in absence of review text
                               max_Product_reviews_IP_lag1,
                               Product_reviews_EP_lag1,
                               Product_reviews_brv_lag1,
                               Product_reviews_cosori_lag1,
                               Product_reviews_crockp_lag1,
                               #additional variables after RnR 2
                               mean_sellerstar_clust1_lag1,
                               mean_sellerstar_clust2_lag1,
                               mean_sellerstar_clust3_lag1,
                               mean_sellerstar_clust4_lag1,
                               mean_sellerstar_clust5_lag1,
                               mean_product_star_rate_IP_lag1,
                               product_star_avbay_lag1,
                               product_star_brv_lag1,      
                               product_star_cmax_lag1,
                               product_star_cnart_lag1,
                               product_star_cosori_lag1,    
                               product_star_crockp_lag1,
                               product_star_elec_lag1,
                               product_star_EP_lag1,       
                               product_star_gkchf_lag1,
                               product_star_hbeach_lag1, 
                               product_star_mmill_lag1,    
                               product_star_oyama_lag1,
                               product_star_ppro_lag1,
                               seasonal_sale,
                               ###broken down external site price changes with lags
                               maxpricechange_HD,
                               maxpricechange_HDlag1,
                               maxpricechange_HDlag2,
                               maxpricechange_HDlag3,
                               maxpricechange_sears,
                               maxpricechange_searslag1,
                               maxpricechange_searslag2,
                               maxpricechange_searslag3,
                               maxpricechange_wm,
                               maxpricechange_wmlag1,
                               maxpricechange_wmlag2,
                               maxpricechange_wmlag3))

trainY_clust2 = c(trainY[1:90,6],trainY[181:271,6],trainY[362:421,6])
train_gam_clust2 = rbind(data_clust2[1:90,],data_clust2[181:271,],data_clust2[362:421,])
test1_gam_clust2 = data_clust2[91:180,]
test2_gam_clust2 = data_clust2[272:361,]
test3_gam_clust2 = data_clust2[422:451,]
test_gam_clust2 = rbind(test1_gam_clust2,test2_gam_clust2,test3_gam_clust2)
testY_clust2 = c(Y_clust2_ep[91:180],Y_clust2_ep[272:361],Y_clust2_ep[422:451])


#adding FE to the best GAM model

gamall_clust2 = gam(trainY_clust2 ~	
                      #Y_clust2_ep_lag1	+	
                      #Y_clust2_ep_lag2	+	
                      #Y_clust2_ep_lag3	+
                     # min_cumsalefreq_clust4_lag1	+	
                      #Y_amzn_cosori_lag1	+	
                     # mean_uniq_ASIN_clust5_lag1	+	
                      #Freeship_used3p_lag1	+	
                      #Y_amzn_cosori_lag2	+	
                      #buyboxprice_brv_lag1	+	
                      #Y_clust5_crockp_lag1	+	
                     # salesrank_cat_elec_lag1	+	
                      #max_salesrank_categ_IP_lag1	+	
                      #Y_clust4_breville_lag1	+	min_cumsalefreq_clust3_lag1	+
                      #Y_clust4_breville_lag2	+	
                       buyboxprice_EP_lag1	+	
                      # decr5per_3p_ep	+
                      #decr5per_amzn_brev	+
                      #decr5per_amzn_csri	+	
                      #decr5per_amzn_ep	+	
                      #decr5per_amzn_ip	+
                      #decr5per_clust3_crockp	+	
                      #decr5per_clust4_brev	+	decr5per_clust5_crockp	+
                      #ep_bottombrandlag1	+	
                      #Freeship_used3p_lag1	+	
                      incr10per_3p_ep	+
                      #incr10per_amzn_brev	+	
                      #incr10per_amzn_csri	+	
                     # incr10per_amzn_ep	+
                      #incr10per_amzn_ip	+	incr10per_clust3_crockp	+	incr10per_clust4_brev	+
                      #incr10per_clust5_crockp	+	
                      #incr20per_3p_ep	+	incr20per_amzn_brev	+
                    #  incr20per_amzn_csri	+	
                      #incr20per_amzn_ep	+	
                     # incr20per_amzn_ip	+
                      #incr20per_clust3_crockp	+	incr20per_clust4_brev	+	
                      #incr20per_clust5_crockp	+
                      #incr5per_3p_ep	+	
                      #incr5per_amzn_brev	+	
                      #incr5per_amzn_csri	+
                      #incr5per_amzn_ep	+	
                      #incr5per_amzn_ip	+	incr5per_clust3_crockp	+
                      #incr5per_clust4_brev	+	incr5per_clust5_crockp	+	
                      incr10per_amzn_ep +
                      #ip_topbrandlag1+
                      weekend+
                      #max_Product_reviews_IP_lag1+
                      Product_reviews_EP_lag1+
                      #Product_reviews_brv_lag1+
                     # Product_reviews_cosori_lag1+
                      #Product_reviews_crockp_lag1+
                      #additional FE post 2nd rnd RnR
                     # mean_sellerstar_clust3_lag1+
                    #  mean_product_star_rate_IP_lag1+
                      #product_star_brv_lag1+      
                      #product_star_cmax_lag1+
                     # product_star_cnart_lag1+
                    #  product_star_cosori_lag1+    
                     # product_star_crockp_lag1+
                      #product_star_elec_lag1+
                      product_star_EP_lag1+       
                      #product_star_gkchf_lag1+
                      #product_star_hbeach_lag1+ 
                      #product_star_mmill_lag1+    
                      #product_star_oyama_lag1+
                      #product_star_ppro_lag1+
                      seasonal_sale+
                      ###4th round additions
                      #maxpricechange_HD +
                      #maxpricechange_HDlag2 +
                      #maxpricechange_HDlag3 +
                     # maxpricechange_sears +
                      #maxpricechange_searslag2 +
                      #maxpricechange_searslag3 +
                      maxpricechange_wm 
                      #maxpricechange_wmlag2+
                     # maxpricechange_wmlag3
                    ,family= gaussian(link ='identity') ,data=train_gam_clust2,method="REML",optimizer=c("outer","newton"), fit = TRUE)

pred_clust2_gamall <- predict.gam(gamall_clust2,test_gam_clust2)
MSE_clust2_gamall = mean((testY_clust2 - pred_clust2_gamall)^2)
summary(gamall_clust2)

forecast_clust2 = cbind(testY_clust2,pred_clust2_gamall)
write.csv(forecast_clust2,file = 'forecast_clust2.csv')

#CLUSTER 3
#trainY = cbind(Y_amzn_breville,Y_amzn_cosori,Y_amzn_ep,Y_amzn_ip,Y_clust2_ep,Y_clust3_crockp,Y_clust4_breville,Y_clust5_crockp)

data_clust3 = data.frame(cbind(Y_clust3_crockp_lag1, 
                               Y_clust3_crockp_lag2,
                               Y_clust3_crockp_lag3,
                               min_cumsalefreq_clust4_lag1,
                               Y_amzn_cosori_lag1,
                               mean_uniq_ASIN_clust5_lag1,
                               Freeship_used3p_lag1,
                               Y_amzn_cosori_lag2,
                               buyboxprice_brv_lag1,
                               Y_amzn_breville_lag2,
                               Y_clust5_crockp_lag1,
                               salesrank_cat_elec_lag1,
                               max_salesrank_categ_IP_lag1,
                               num_brvclust2_lag1,
                               Y_clust4_breville_lag1,
                               min_cumsalefreq_clust3_lag1,
                               Y_clust4_breville_lag2,
                               buyboxprice_EP_lag1,
                               Y_clust5_crockp_lag2,
                               #experiment
                               Y_amzn_ep_lag1,
                               Y_amzn_ip_lag1,
                               Y_clust2_ep_lag1,
                               #Y_clust3_crockp_lag1,
                               #experiment edits end
                               #FE with JMRnR
                               brv_bottombrandlag1,
                               brv_topbrandlag1,
                               buyboxprice_crockp_lag1,
                               crockp_bottombrandlag1,
                               crockp_topbrandlag1,
                               csri_bottombrandlag1,
                               csri_topbrandlag1,
                               ep_bottombrandlag1,
                               ep_topbrandlag1,
                               ip_bottombrandlag1,
                               ip_topbrandlag1,
                               decr10per_3p_ep	,
                               decr10per_amzn_brev	,
                               decr10per_amzn_csri	,
                               decr10per_amzn_crockp,
                               decr10per_amzn_ep	,
                               decr10per_amzn_ip	,
                               decr10per_clust1_brev	,
                               decr10per_clust3_crockp	,
                               decr10per_clust4_brev	,
                               decr10per_clust5_crockp	,
                               decr20per_3p_ep	,
                               decr20per_amzn_brev	,
                               decr20per_amzn_csri	,
                               decr20per_amzn_crockp,
                               decr20per_amzn_ep	,
                               decr20per_amzn_ip	,
                               decr20per_clust1_brev	,
                               decr20per_clust3_crockp	,
                               decr20per_clust4_brev	,
                               decr20per_clust5_crockp	,
                               decr5per_3p_ep	,
                               decr5per_amzn_brev	,
                               decr5per_amzn_csri	,
                               decr5per_amzn_crockp,
                               decr5per_amzn_ep	,
                               decr5per_amzn_ip	,
                               decr5per_clust1_brev	,
                               decr5per_clust3_crockp	,
                               decr5per_clust4_brev	,
                               decr5per_clust5_crockp	,
                               incr10per_3p_ep	,
                               incr10per_amzn_brev	,
                               incr10per_amzn_csri	,
                               incr10per_amzn_crockp,
                               incr10per_amzn_ep	,
                               incr10per_amzn_ip	,
                               incr10per_clust1_brev	,
                               incr10per_clust3_crockp	,
                               incr10per_clust4_brev	,
                               incr10per_clust5_crockp	,
                               incr20per_3p_ep	,
                               incr20per_amzn_brev	,
                               incr20per_amzn_csri	,
                               incr20per_amzn_crockp,
                               incr20per_amzn_ep	,
                               incr20per_amzn_ip	,
                               incr20per_clust1_brev	,
                               incr20per_clust3_crockp	,
                               incr20per_clust4_brev	,
                               incr20per_clust5_crockp	,
                               incr5per_3p_ep	,
                               incr5per_amzn_brev	,
                               incr5per_amzn_csri	,
                               incr5per_amzn_crockp,
                               incr5per_amzn_ep	,
                               incr5per_amzn_ip	,
                               incr5per_clust1_brev	,
                               incr5per_clust3_crockp	,
                               incr5per_clust4_brev	,
                               incr5per_clust5_crockp	,
                               weekend,
                               #more additional variables - in absence of review text
                               max_Product_reviews_IP_lag1,
                               Product_reviews_EP_lag1,
                               Product_reviews_brv_lag1,
                               Product_reviews_cosori_lag1,
                               Product_reviews_crockp_lag1,
                               #additional variables after RnR 2
                               mean_sellerstar_clust1_lag1,
                               mean_sellerstar_clust2_lag1,
                               mean_sellerstar_clust3_lag1,
                               mean_sellerstar_clust4_lag1,
                               mean_sellerstar_clust5_lag1,
                               mean_product_star_rate_IP_lag1,
                               product_star_avbay_lag1,
                               product_star_brv_lag1,      
                               product_star_cmax_lag1,
                               product_star_cnart_lag1,
                               product_star_cosori_lag1,    
                               product_star_crockp_lag1,
                               product_star_elec_lag1,
                               product_star_EP_lag1,       
                               product_star_gkchf_lag1,
                               product_star_hbeach_lag1, 
                               product_star_mmill_lag1,    
                               product_star_oyama_lag1,
                               product_star_ppro_lag1,
                               seasonal_sale,
                               ###broken down external site price changes with lags
                               maxpricechange_HD,
                               maxpricechange_HDlag1,
                               maxpricechange_HDlag2,
                               maxpricechange_HDlag3,
                               maxpricechange_sears,
                               maxpricechange_searslag1,
                               maxpricechange_searslag2,
                               maxpricechange_searslag3,
                               maxpricechange_wm,
                               maxpricechange_wmlag1,
                               maxpricechange_wmlag2,
                               maxpricechange_wmlag3))

trainY_clust3 = c(trainY[1:90,7],trainY[181:271,7],trainY[362:421,7])
train_gam_clust3 = rbind(data_clust3[1:90,],data_clust3[181:271,],data_clust3[362:421,])
test1_gam_clust3 = data_clust3[91:180,]
test2_gam_clust3 = data_clust3[272:361,]
test3_gam_clust3 = data_clust3[422:451,]
test_gam_clust3 = rbind(test1_gam_clust3,test2_gam_clust3,test3_gam_clust3)
testY_clust3 = c(Y_clust3_crockp[91:180],Y_clust3_crockp[272:361],Y_clust3_crockp[422:451])


#adding FE to the best GAM model
gam0_clust3 = gam(trainY_clust3 ~  
                    #Y_clust3_crockp_lag1+
                    #Y_clust3_crockp_lag3+
                    #s(Y_clust5_crockp_lag1)+
                   # Y_clust4_breville_lag2+
                   # s(Y_amzn_cosori_lag1) +
                    #Y_amzn_cosori_lag2 +
                    buyboxprice_crockp_lag1+
                   Freeship_used3p_lag1 +
                    #decr10per_3p_ep	+	
                    decr10per_amzn_brev	+
                    decr10per_amzn_crockp+
                    decr5per_amzn_crockp+
                    decr5per_amzn_brev	+
                    #decr5per_amzn_csri	+	decr5per_amzn_ep	+	decr5per_amzn_ip	+
                    decr5per_clust3_crockp	+	
                    #decr5per_clust4_brev	+	
                    decr5per_clust5_crockp	+
                    ep_bottombrandlag1	+	
                    #incr10per_3p_ep	+
                    incr10per_amzn_brev	+	
                    #incr10per_amzn_csri	+	
                    #incr10per_amzn_ep	+
                    incr10per_amzn_ip	+	
                    incr10per_amzn_crockp +
                    #incr10per_clust3_crockp	+	incr10per_clust4_brev	+
                    incr10per_clust5_crockp	+	
                    #incr20per_3p_ep	+	incr20per_amzn_brev	+
                    #incr20per_amzn_csri	+	
                    #incr20per_amzn_ep	+	
                    incr20per_amzn_ip	+
                    #incr20per_clust3_crockp	+	
                    incr20per_clust4_brev	+	incr20per_clust5_crockp	+
                    #incr5per_3p_ep	+	
                    incr5per_amzn_brev	+	
                    #incr5per_amzn_csri	+
                    #incr5per_amzn_ep	+	incr5per_amzn_ip	+	
                    incr5per_clust3_crockp	+
                    #incr5per_clust4_brev	+	incr5per_clust5_crockp	+	
                    #ip_topbrandlag1+
                    weekend+
                    #max_Product_reviews_IP_lag1+
                    #Product_reviews_EP_lag1+
                    #Product_reviews_brv_lag1+
                    #Product_reviews_cosori_lag1+
                    #Product_reviews_crockp_lag1+
                    #additional FEs after 2nd RnR
                     mean_sellerstar_clust5_lag1+
                   # product_star_avbay_lag1+
                    mean_product_star_rate_IP_lag1+
                    product_star_brv_lag1+      
                    #product_star_cmax_lag1+
                    product_star_cnart_lag1+
                    product_star_cosori_lag1+    
                    product_star_crockp_lag1+
                    #product_star_elec_lag1+
                    product_star_EP_lag1+       
                    #product_star_gkchf_lag1+
                    #product_star_hbeach_lag1+ 
                    #product_star_mmill_lag1+    
                    #product_star_oyama_lag1+
                    #product_star_ppro_lag1+
                  seasonal_sale+
                    ###4th round additions
                    #maxpricechange_HD +
                    maxpricechange_HDlag2 +
                    #maxpricechange_sears +
                    maxpricechange_searslag2 +
                    #maxpricechange_wm  
                    maxpricechange_wmlag2,family= gaussian(link ='identity') ,data=train_gam_clust3,method="REML",optimizer=c("outer","newton"), fit = TRUE)


pred_clust3_gam0 <- predict.gam(gam0_clust3,test_gam_clust3)
MSE_clust3_gam0 = mean((testY_clust3 - pred_clust3_gam0)^2)
SDSE_clust3_gam0 = sd((testY_clust3 - pred_clust3_gam0)^2)
summary(gam0_clust3)

forecast_clust3 = cbind(testY_clust3,pred_clust3_gam0)
write.csv(forecast_clust3,file = 'forecast_clust3.csv')


#CLUSTER 4

#trainY = cbind(Y_amzn_breville,Y_amzn_cosori,Y_amzn_ep,Y_amzn_ip,Y_clust2_ep,Y_clust3_crockp,Y_clust4_breville,Y_clust5_crockp)

data_clust4 = data.frame(cbind(Y_clust4_breville_lag1,
                               Y_clust4_breville_lag2,
                               Y_clust4_breville_lag3,
                               min_cumsalefreq_clust4_lag1,
                               Y_amzn_cosori_lag1,
                               mean_uniq_ASIN_clust5_lag1,
                               Freeship_used3p_lag1,
                               Y_amzn_cosori_lag2,
                               buyboxprice_brv_lag1,
                               Y_amzn_breville_lag2,
                               Y_clust5_crockp_lag1,
                               salesrank_cat_elec_lag1,
                               max_salesrank_categ_IP_lag1,
                               num_brvclust2_lag1,
                               min_cumsalefreq_clust3_lag1,
                               Y_clust3_crockp_lag3,
                               buyboxprice_EP_lag1,
                               Y_clust5_crockp_lag2,
                               #FE with JMRnR
                               brv_bottombrandlag1,
                               brv_topbrandlag1,
                               crockp_bottombrandlag1,
                               crockp_topbrandlag1,
                               csri_bottombrandlag1,
                               csri_topbrandlag1,
                               ep_bottombrandlag1,
                               ep_topbrandlag1,
                               ip_bottombrandlag1,
                               ip_topbrandlag1,
                               decr10per_3p_ep	,
                               decr10per_amzn_brev	,
                               decr10per_amzn_csri	,
                               decr10per_amzn_ep	,
                               decr10per_amzn_ip	,
                               decr10per_clust1_brev	,
                               decr10per_clust3_crockp	,
                               decr10per_clust4_brev	,
                               decr10per_clust5_crockp	,
                               decr20per_3p_ep	,
                               decr20per_amzn_brev	,
                               decr20per_amzn_csri	,
                               decr20per_amzn_ep	,
                               decr20per_amzn_ip	,
                               decr20per_clust1_brev	,
                               decr20per_clust3_crockp	,
                               decr20per_clust4_brev	,
                               decr20per_clust5_crockp	,
                               decr5per_3p_ep	,
                               decr5per_amzn_brev	,
                               decr5per_amzn_csri	,
                               decr5per_amzn_ep	,
                               decr5per_amzn_ip	,
                               decr5per_clust1_brev	,
                               decr5per_clust3_crockp	,
                               decr5per_clust4_brev	,
                               decr5per_clust5_crockp	,
                               incr10per_3p_ep	,
                               incr10per_amzn_brev	,
                               incr10per_amzn_csri	,
                               incr10per_amzn_ep	,
                               incr10per_amzn_ip	,
                               incr10per_clust1_brev	,
                               incr10per_clust3_crockp	,
                               incr10per_clust4_brev	,
                               incr10per_clust5_crockp	,
                               incr20per_3p_ep	,
                               incr20per_amzn_brev	,
                               incr20per_amzn_csri	,
                               incr20per_amzn_ep	,
                               incr20per_amzn_ip	,
                               incr20per_clust1_brev	,
                               incr20per_clust3_crockp	,
                               incr20per_clust4_brev	,
                               incr20per_clust5_crockp	,
                               incr5per_3p_ep	,
                               incr5per_amzn_brev	,
                               incr5per_amzn_csri	,
                               incr5per_amzn_ep	,
                               incr5per_amzn_ip	,
                               incr5per_clust1_brev	,
                               incr5per_clust3_crockp	,
                               incr5per_clust4_brev	,
                               incr5per_clust5_crockp	,
                               weekend,
                               #more additional variables - in absence of review text
                               max_Product_reviews_IP_lag1,
                               Product_reviews_EP_lag1,
                               Product_reviews_brv_lag1,
                               Product_reviews_cosori_lag1,
                               Product_reviews_crockp_lag1,
                               #additional variables after RnR 2
                               mean_sellerstar_clust1_lag1,
                               mean_sellerstar_clust2_lag1,
                               mean_sellerstar_clust3_lag1,
                               mean_sellerstar_clust4_lag1,
                               mean_sellerstar_clust5_lag1,
                               mean_product_star_rate_IP_lag1,
                               product_star_avbay_lag1,
                               product_star_brv_lag1,      
                               product_star_cmax_lag1,
                               product_star_cnart_lag1,
                               product_star_cosori_lag1,    
                               product_star_crockp_lag1,
                               product_star_elec_lag1,
                               product_star_EP_lag1,       
                               product_star_gkchf_lag1,
                               product_star_hbeach_lag1, 
                               product_star_mmill_lag1,    
                               product_star_oyama_lag1,
                               product_star_ppro_lag1,
                               seasonal_sale,
                               ###broken down external site price changes with lags
                               maxpricechange_HD,
                               maxpricechange_HDlag1,
                               maxpricechange_HDlag2,
                               maxpricechange_HDlag3,
                               maxpricechange_sears,
                               maxpricechange_searslag1,
                               maxpricechange_searslag2,
                               maxpricechange_searslag3,
                               maxpricechange_wm,
                               maxpricechange_wmlag1,
                               maxpricechange_wmlag2,
                               maxpricechange_wmlag3))

trainY_clust4 = c(trainY[1:90,8],trainY[181:271,8],trainY[362:421,8])
train_gam_clust4 = rbind(data_clust4[1:90,],data_clust4[181:271,],data_clust4[362:421,])
test1_gam_clust4 = data_clust4[91:180,]
test2_gam_clust4 = data_clust4[272:361,]
test3_gam_clust4 = data_clust4[422:451,]
test_gam_clust4 = rbind(test1_gam_clust4,test2_gam_clust4,test3_gam_clust4)
testY_clust4 = c(Y_clust4_breville[91:180],Y_clust4_breville[272:361],Y_clust4_breville[422:451])

#with FE from RnR
gam3_clust4 = gam(trainY_clust4 ~  
                    #Y_clust4_breville_lag1	+
                    #Y_clust4_breville_lag2	+
                    #Y_clust4_breville_lag3	+
                    buyboxprice_brv_lag1	+
                   # Y_amzn_breville_lag2	+
                    min_cumsalefreq_clust4_lag1 +
                    num_brvclust2_lag1	+
                     #Freeship_used3p_lag1+
                    #salesrank_cat_elec_lag1+
                    #Y_clust3_crockp_lag3+
                    #Y_clust5_crockp_lag2+
                    min_cumsalefreq_clust3_lag1+
                    #decr10per_3p_ep	+	
                    #decr10per_amzn_brev	+
                    #decr10per_amzn_csri	+	
                    #decr10per_amzn_ep	+	
                    decr10per_amzn_ip	+
                    #decr10per_clust3_crockp	+	
                    decr10per_clust4_brev	+	
                    #decr10per_clust5_crockp	+
                    #decr20per_amzn_brev	+	
                    #decr20per_amzn_csri	+	
                    decr20per_amzn_ep	+
                    #decr20per_amzn_ip	+	decr20per_clust3_crockp	+	
                    decr20per_clust4_brev	+
                    #decr20per_clust5_crockp	+	
                    #decr5per_3p_ep	+	decr5per_amzn_brev	+
                    #decr5per_amzn_csri	+	
                    decr5per_amzn_ep	+	
                    #decr5per_amzn_ip	+
                    #decr5per_clust3_crockp	+	
                    #decr5per_clust4_brev	+	
                    #decr5per_clust5_crockp	+
                    #ep_bottombrandlag1	+	Freeship_used3p_lag1	+	incr10per_3p_ep	+
                    #incr10per_amzn_brev	+	
                    #incr10per_amzn_csri	+	
                    #incr10per_amzn_ep	+
                    incr10per_amzn_ip	+	
                    #incr10per_clust3_crockp	+	
                    #incr10per_clust4_brev	+
                    incr10per_clust5_crockp	+	
                    #incr20per_3p_ep	+	incr20per_amzn_brev	+
                    #incr20per_amzn_csri	+	incr20per_amzn_ep	+	incr20per_amzn_ip	+
                    incr20per_clust3_crockp	+	incr20per_clust4_brev	+	incr20per_clust5_crockp	+
                    #incr5per_3p_ep	+	
                    #incr5per_amzn_brev	+	incr5per_amzn_csri	+
                    #incr5per_amzn_ep	+	incr5per_amzn_ip	+	incr5per_clust3_crockp	+
                    incr5per_clust4_brev	+	
                    incr5per_clust5_crockp	+	
                    ip_topbrandlag1+
                    weekend+
                    #max_Product_reviews_IP_lag1+
                    #Product_reviews_EP_lag1+
                    Product_reviews_brv_lag1+
                    #Product_reviews_cosori_lag1+
                    #Product_reviews_crockp_lag1+
                    #additional variables after RnR 2
                  #  mean_sellerstar_clust1_lag1 + 
                  mean_sellerstar_clust2_lag1 + 
                  mean_sellerstar_clust3_lag1 + 
                  mean_sellerstar_clust4_lag1 + 
                #  mean_sellerstar_clust5_lag1 + 
                    mean_product_star_rate_IP_lag1+
                    product_star_brv_lag1+      
                    #product_star_cmax_lag1+
                   # product_star_cnart_lag1+
                  #  product_star_cosori_lag1+    
                   # product_star_crockp_lag1+
                    #product_star_elec_lag1+
                   # product_star_EP_lag1+       
                   # product_star_gkchf_lag1+
                    #product_star_hbeach_lag1+ 
                   # product_star_mmill_lag1+    
                    #product_star_oyama_lag1+
                   # product_star_ppro_lag1+
                    #mean_primeBR_clust5_lag1+
                    seasonal_sale +
                    ###4th round additions
                    #maxpricechange_HD +
                   # maxpricechange_HDlag3 +
                    #maxpricechange_sears +
                   # maxpricechange_searslag3 +
                    #maxpricechange_wm  
                    maxpricechange_wmlag3,family= gaussian(link ='identity') ,data=train_gam_clust4,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam3_clust4)
pred_clust4_gam3 <- predict.gam(gam3_clust4,test_gam_clust4)

forecast_clust4 = cbind(testY_clust4,pred_clust4_gam3)
write.csv(forecast_clust4,file = 'forecast_clust4.csv')

#CLUSTER 5

#trainY = cbind(Y_amzn_breville,Y_amzn_cosori,Y_amzn_ep,Y_amzn_ip,Y_clust2_ep,Y_clust3_crockp,Y_clust4_breville,Y_clust5_crockp)

data_clust5 = data.frame(cbind(Y_clust5_crockp_lag1,
                               Y_clust5_crockp_lag2,
                               Y_clust5_crockp_lag3,
                               min_cumsalefreq_clust4_lag1,
                               Y_amzn_cosori_lag1,
                               mean_uniq_ASIN_clust5_lag1,
                               Freeship_used3p_lag1,
                               Y_amzn_cosori_lag2,
                               buyboxprice_brv_lag1,
                               buyboxprice_crockp_lag1,
                               Y_amzn_breville_lag2,
                               salesrank_cat_elec_lag1,
                               max_salesrank_categ_IP_lag1,
                               num_brvclust2_lag1,
                               Y_clust4_breville_lag1,
                               min_cumsalefreq_clust3_lag1,
                               Y_clust3_crockp_lag3,
                               Y_clust4_breville_lag2,
                               buyboxprice_EP_lag1,
                               #experiment
                               Y_amzn_ep_lag1,
                               Y_amzn_ip_lag1,
                               Y_clust2_ep_lag1,
                               Y_clust3_crockp_lag1,
                               #experiment edits end
                               #FE with JMRnR
                               brv_bottombrandlag1,
                               brv_topbrandlag1,
                               crockp_bottombrandlag1,
                               crockp_topbrandlag1,
                               csri_bottombrandlag1,
                               csri_topbrandlag1,
                               ep_bottombrandlag1,
                               ep_topbrandlag1,
                               ip_bottombrandlag1,
                               ip_topbrandlag1,
                               decr10per_3p_ep	,
                               decr10per_amzn_brev	,
                               decr10per_amzn_csri	,
                               decr10per_amzn_crockp,
                               decr10per_amzn_ep	,
                               decr10per_amzn_ip	,
                               decr10per_clust1_brev	,
                               decr10per_clust3_crockp	,
                               decr10per_clust4_brev	,
                               decr10per_clust5_crockp	,
                               decr20per_3p_ep	,
                               decr20per_amzn_brev	,
                               decr20per_amzn_csri	,
                               decr20per_amzn_crockp,
                               decr20per_amzn_ep	,
                               decr20per_amzn_ip	,
                               decr20per_clust1_brev	,
                               decr20per_clust3_crockp	,
                               decr20per_clust4_brev	,
                               decr20per_clust5_crockp	,
                               decr5per_3p_ep	,
                               decr5per_amzn_brev	,
                               decr5per_amzn_csri	,
                               decr5per_amzn_crockp,
                               decr5per_amzn_ep	,
                               decr5per_amzn_ip	,
                               decr5per_clust1_brev	,
                               decr5per_clust3_crockp	,
                               decr5per_clust4_brev	,
                               decr5per_clust5_crockp	,
                               incr10per_3p_ep	,
                               incr10per_amzn_brev	,
                               incr10per_amzn_csri	,
                               incr10per_amzn_crockp,
                               incr10per_amzn_ep	,
                               incr10per_amzn_ip	,
                               incr10per_clust1_brev	,
                               incr10per_clust3_crockp	,
                               incr10per_clust4_brev	,
                               incr10per_clust5_crockp	,
                               incr20per_3p_ep	,
                               incr20per_amzn_brev	,
                               incr20per_amzn_csri	,
                               incr20per_amzn_crockp,
                               incr20per_amzn_ep	,
                               incr20per_amzn_ip	,
                               incr20per_clust1_brev	,
                               incr20per_clust3_crockp	,
                               incr20per_clust4_brev	,
                               incr20per_clust5_crockp	,
                               incr5per_3p_ep	,
                               incr5per_amzn_brev	,
                               incr5per_amzn_csri	,
                               incr5per_amzn_crockp,
                               incr5per_amzn_ep	,
                               incr5per_amzn_ip	,
                               incr5per_clust1_brev	,
                               incr5per_clust3_crockp	,
                               incr5per_clust4_brev	,
                               incr5per_clust5_crockp	,
                               weekend,
                               #more additional variables - in absence of review text
                               max_Product_reviews_IP_lag1,
                               Product_reviews_EP_lag1,
                               Product_reviews_brv_lag1,
                               Product_reviews_cosori_lag1,
                               Product_reviews_crockp_lag1,
                               mean_product_star_rate_IP_lag1,
                              # product_star_avbay_lag1,
                               product_star_brv_lag1,      
                               #product_star_cmax_lag1,
                               product_star_cnart_lag1,
                               product_star_cosori_lag1,    
                               product_star_crockp_lag1,
                              # product_star_elec_lag1,
                               product_star_EP_lag1,       
                               #product_star_gkchf_lag1,
                               #product_star_hbeach_lag1, 
                               #product_star_mmill_lag1,    
                               #product_star_oyama_lag1,
                               #product_star_ppro_lag1,
                               #additional variables after RnR 2
                              mean_sellerstar_clust1_lag1, 
                                mean_sellerstar_clust2_lag1,
                                mean_sellerstar_clust3_lag1,
                                mean_sellerstar_clust4_lag1,
                                mean_sellerstar_clust5_lag1,
                               seasonal_sale,
                              ###broken down external site price changes with lags
                              maxpricechange_HD,
                              maxpricechange_HDlag1,
                              maxpricechange_HDlag2,
                              maxpricechange_HDlag3,
                              maxpricechange_sears,
                              maxpricechange_searslag1,
                              maxpricechange_searslag2,
                              maxpricechange_searslag3,
                              maxpricechange_wm,
                              maxpricechange_wmlag1,
                              maxpricechange_wmlag2,
                              maxpricechange_wmlag3))

trainY_clust5 = c(trainY[1:90,9],trainY[181:271,9],trainY[362:421,9])
train_gam_clust5 = rbind(data_clust5[1:90,],data_clust5[181:271,],data_clust5[362:421,])
test1_gam_clust5 = data_clust5[91:180,]
test2_gam_clust5 = data_clust5[272:361,]
test3_gam_clust5 = data_clust5[422:451,]
test_gam_clust5 = rbind(test1_gam_clust5,test2_gam_clust5,test3_gam_clust5)
testY_clust5 = c(Y_clust5_crockp[91:180],Y_clust5_crockp[272:361],Y_clust5_crockp[422:451])


#with FE from RnR

gam1_clust5 = gam(trainY_clust5 ~  
                   # Y_clust5_crockp_lag1	+
                   # Y_clust5_crockp_lag3	+
                    #s(Freeship_used3p_lag1)	+
                    #min_cumsalefreq_clust4_lag1+
                  # s(Y_amzn_cosori_lag1)	+
                    mean_uniq_ASIN_clust5_lag1	+
                    #min_cumsalefreq_clust3_lag1 +
                  #  Y_amzn_cosori_lag2 +
                    #s(buyboxprice_EP_lag1)+
                  #  Y_clust4_breville_lag1 +
                    buyboxprice_crockp_lag1+
                    #decr10per_3p_ep	+	
                    #decr10per_amzn_brev	+
                    #decr10per_amzn_csri	+	
                    #decr10per_amzn_ep	+	
                    #decr10per_amzn_ip	+
                    #decr10per_clust3_crockp	+	
                    decr10per_clust4_brev	+	
                    #decr10per_amzn_crockp +
                   # decr5per_amzn_crockp +
                    decr20per_amzn_crockp +
                    incr10per_amzn_crockp +
                    #decr10per_clust5_crockp	+
                    #decr20per_amzn_brev	+	
                    #decr20per_amzn_csri	+	
                    #decr20per_amzn_ep	+
                    #decr20per_amzn_ip	+	decr20per_clust3_crockp	+	
                   # decr20per_clust4_brev	+
                    #decr20per_clust5_crockp	+	
                    #decr5per_3p_ep	+	decr5per_amzn_brev	+
                    #decr5per_amzn_csri	+	
                    #decr5per_amzn_ep	+	
                    decr5per_amzn_ip	+
                    decr5per_clust3_crockp	+	
                    #decr5per_clust4_brev	+	
                    #decr5per_clust5_crockp	+
                    #ep_bottombrandlag1	+	
                    Freeship_used3p_lag1	+	
                    #incr10per_3p_ep	+
                    incr10per_amzn_brev	+	
                    #incr10per_amzn_csri	+	
                    #incr10per_amzn_ep	+
                    #incr10per_amzn_ip	+	
                    incr10per_clust3_crockp	+
                    incr10per_clust4_brev	+
                    #incr10per_clust5_crockp	+	
                    #incr20per_3p_ep	+	incr20per_amzn_brev	+
                    #incr20per_amzn_csri	+	
                    #incr20per_amzn_ep	+	incr20per_amzn_ip	+
                    incr20per_clust3_crockp	+	
                    #incr20per_clust4_brev	+	
                   # incr20per_clust5_crockp	+
                    #incr5per_3p_ep	+	
                    #incr5per_amzn_brev	+	
                   # incr5per_amzn_csri	+
                    #incr5per_amzn_ep	+	incr5per_amzn_ip	+	incr5per_clust3_crockp	+
                    #incr5per_clust4_brev	+	
                    incr5per_clust5_crockp	+	
                    #ip_topbrandlag1+
                    #max_Product_reviews_IP_lag1+
                    #Product_reviews_EP_lag1+
                    #Product_reviews_brv_lag1+
                    #Product_reviews_cosori_lag1+
                    Product_reviews_crockp_lag1+
                   # mean_product_star_rate_IP_lag1+
                    product_star_brv_lag1+      
                    weekend +
                    #mean_sellerstar_clust1_lag1 + 
                    mean_sellerstar_clust2_lag1 + 
                    mean_sellerstar_clust3_lag1 + 
                    mean_sellerstar_clust4_lag1 + 
                   # mean_sellerstar_clust5_lag1+
                  +seasonal_sale +
                    ###4th round additions
                    #maxpricechange_HD +
                  #  maxpricechange_HDlag1 +
                    #maxpricechange_sears +
                   # maxpricechange_searslag1 +
                    #maxpricechange_wm  
                    maxpricechange_wmlag1
      
                  ,family= gaussian(link ='identity') ,data=train_gam_clust5,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam1_clust5)
pred_clust5_gam1 <- predict.gam(gam1_clust5,test_gam_clust5)

forecast_clust5 = cbind(testY_clust5,pred_clust5_gam1)
write.csv(forecast_clust5,file = 'forecast_clust5.csv')
